-
Notifications
You must be signed in to change notification settings - Fork 14.8k
[ARM] Have custom lowering for ucmp and scmp #149315
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Conversation
@llvm/pr-subscribers-backend-arm Author: AZero13 (AZero13) ChangesLimited to non-thumb at the moment, but we can do this for i32 in 3 steps, using subs to set the flags initially. Full diff: https://github.com/llvm/llvm-project/pull/149315.diff 4 Files Affected:
diff --git a/llvm/lib/Target/ARM/ARMISelLowering.cpp b/llvm/lib/Target/ARM/ARMISelLowering.cpp
index 65d1c4e2d6515..9681eab17518b 100644
--- a/llvm/lib/Target/ARM/ARMISelLowering.cpp
+++ b/llvm/lib/Target/ARM/ARMISelLowering.cpp
@@ -802,6 +802,11 @@ ARMTargetLowering::ARMTargetLowering(const TargetMachine &TM_,
setOperationAction(ISD::BSWAP, VT, Expand);
}
+ if (!Subtarget->isThumb()) {
+ setOperationAction(ISD::SCMP, MVT::i32, Custom);
+ setOperationAction(ISD::UCMP, MVT::i32, Custom);
+ }
+
setOperationAction(ISD::ConstantFP, MVT::f32, Custom);
setOperationAction(ISD::ConstantFP, MVT::f64, Custom);
@@ -10614,6 +10619,142 @@ SDValue ARMTargetLowering::LowerFP_TO_BF16(SDValue Op,
return DAG.getBitcast(MVT::i32, Res);
}
+SDValue ARMTargetLowering::LowerSCMP(SDValue Op, SelectionDAG &DAG) const {
+ SDLoc dl(Op);
+ SDValue LHS = Op.getOperand(0);
+ SDValue RHS = Op.getOperand(1);
+
+ // For the ARM assembly pattern:
+ // subs r0, r0, r1 ; subtract RHS from LHS and set flags
+ // movgt r0, #1 ; if LHS > RHS, set result to 1
+ // mvnlt r0, #0 ; if LHS < RHS, set result to -1 (mvn #0 = -1)
+ // ; if LHS == RHS, result remains 0 from the subs
+
+ // Optimization: if RHS is a subtraction against 0, use ADDC instead of SUBC
+ // Check if RHS is (0 - something), and if so use ADDC with LHS + something
+ SDValue SubResult, Flags;
+ bool CanUseAdd = false;
+ SDValue AddOperand;
+
+ // Check if RHS is a subtraction against 0: (0 - X)
+ if (RHS.getOpcode() == ISD::SUB) {
+ SDValue SubLHS = RHS.getOperand(0);
+ SDValue SubRHS = RHS.getOperand(1);
+
+ // Check if it's 0 - X
+ if (isNullConstant(SubLHS)) {
+ // For SCMP: only if X is known to never be INT_MIN (to avoid overflow)
+ if (RHS->getFlags().hasNoSignedWrap() || !DAG.computeKnownBits(SubRHS)
+ .getSignedMinValue()
+ .isMinSignedValue()) {
+ CanUseAdd = true;
+ AddOperand = SubRHS; // Replace RHS with X, so we do LHS + X instead of
+ // LHS - (0 - X)
+ }
+ }
+ }
+
+ if (CanUseAdd) {
+ // Use ADDC: LHS + AddOperand (where RHS was 0 - AddOperand)
+ SDValue AddWithFlags = DAG.getNode(
+ ARMISD::ADDC, dl, DAG.getVTList(MVT::i32, FlagsVT), LHS, AddOperand);
+ SubResult = AddWithFlags.getValue(0); // The addition result
+ Flags = AddWithFlags.getValue(1); // The flags from ADDS
+ } else {
+ // Use ARMISD::SUBC to generate SUBS instruction (subtract with flags)
+ SDValue SubWithFlags = DAG.getNode(
+ ARMISD::SUBC, dl, DAG.getVTList(MVT::i32, FlagsVT), LHS, RHS);
+ SubResult = SubWithFlags.getValue(0); // The subtraction result
+ Flags = SubWithFlags.getValue(1); // The flags from SUBS
+ }
+
+ // Constants for conditional moves
+ SDValue One = DAG.getConstant(1, dl, MVT::i32);
+ SDValue MinusOne = DAG.getConstant(0xFFFFFFFF, dl, MVT::i32);
+
+ // movgt: if greater than, set to 1
+ SDValue GTCond = DAG.getConstant(ARMCC::GT, dl, MVT::i32);
+ SDValue Result1 =
+ DAG.getNode(ARMISD::CMOV, dl, MVT::i32, SubResult, One, GTCond, Flags);
+
+ // mvnlt: if less than, set to -1 (equivalent to mvn #0)
+ SDValue LTCond = DAG.getConstant(ARMCC::LT, dl, MVT::i32);
+ SDValue Result2 =
+ DAG.getNode(ARMISD::CMOV, dl, MVT::i32, Result1, MinusOne, LTCond, Flags);
+
+ if (Op.getValueType() != MVT::i32)
+ Result2 = DAG.getSExtOrTrunc(Result2, dl, Op.getValueType());
+
+ return Result2;
+}
+
+SDValue ARMTargetLowering::LowerUCMP(SDValue Op, SelectionDAG &DAG) const {
+ SDLoc dl(Op);
+ SDValue LHS = Op.getOperand(0);
+ SDValue RHS = Op.getOperand(1);
+
+ // For the ARM assembly pattern (unsigned version):
+ // subs r0, r0, r1 ; subtract RHS from LHS and set flags
+ // movhi r0, #1 ; if LHS > RHS (unsigned), set result to 1
+ // mvnlo r0, #0 ; if LHS < RHS (unsigned), set result to -1
+ // ; if LHS == RHS, result remains 0 from the subs
+
+ // Optimization: if RHS is a subtraction against 0, use ADDC instead of SUBC
+ // Check if RHS is (0 - something), and if so use ADDC with LHS + something
+ SDValue SubResult, Flags;
+ bool CanUseAdd = false;
+ SDValue AddOperand;
+
+ // Check if RHS is a subtraction against 0: (0 - X)
+ if (RHS.getOpcode() == ISD::SUB) {
+ SDValue SubLHS = RHS.getOperand(0);
+ SDValue SubRHS = RHS.getOperand(1);
+
+ // Check if it's 0 - X
+ if (isNullConstant(SubLHS)) {
+ // For UCMP: only if X is known to never be zero
+ if (DAG.isKnownNeverZero(SubRHS)) {
+ CanUseAdd = true;
+ AddOperand = SubRHS; // Replace RHS with X, so we do LHS + X instead of
+ // LHS - (0 - X)
+ }
+ }
+ }
+
+ if (CanUseAdd) {
+ // Use ADDC: LHS + AddOperand (where RHS was 0 - AddOperand)
+ SDValue AddWithFlags = DAG.getNode(
+ ARMISD::ADDC, dl, DAG.getVTList(MVT::i32, FlagsVT), LHS, AddOperand);
+ SubResult = AddWithFlags.getValue(0); // The addition result
+ Flags = AddWithFlags.getValue(1); // The flags from ADDS
+ } else {
+ // Use ARMISD::SUBC to generate SUBS instruction (subtract with flags)
+ SDValue SubWithFlags = DAG.getNode(
+ ARMISD::SUBC, dl, DAG.getVTList(MVT::i32, FlagsVT), LHS, RHS);
+ SubResult = SubWithFlags.getValue(0); // The subtraction result
+ Flags = SubWithFlags.getValue(1); // The flags from SUBS
+ }
+
+ // Constants for conditional moves
+ SDValue One = DAG.getConstant(1, dl, MVT::i32);
+ SDValue MinusOne = DAG.getConstant(0xFFFFFFFF, dl, MVT::i32);
+
+ // movhi: if higher (unsigned greater than), set to 1
+ SDValue HICond = DAG.getConstant(ARMCC::HI, dl, MVT::i32);
+ SDValue Result1 =
+ DAG.getNode(ARMISD::CMOV, dl, MVT::i32, SubResult, One, HICond, Flags);
+
+ // mvnlo: if lower (unsigned less than), set to -1
+ SDValue LOCond = DAG.getConstant(ARMCC::LO, dl, MVT::i32);
+ SDValue Result2 =
+ DAG.getNode(ARMISD::CMOV, dl, MVT::i32, Result1, MinusOne, LOCond, Flags);
+
+ if (Op.getValueType() != MVT::i32)
+ Result2 = DAG.getSExtOrTrunc(Result2, dl, Op.getValueType());
+
+ return Result2;
+}
+
SDValue ARMTargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const {
LLVM_DEBUG(dbgs() << "Lowering node: "; Op.dump());
switch (Op.getOpcode()) {
@@ -10742,6 +10883,10 @@ SDValue ARMTargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const {
case ISD::FP_TO_BF16:
return LowerFP_TO_BF16(Op, DAG);
case ARMISD::WIN__DBZCHK: return SDValue();
+ case ISD::SCMP:
+ return LowerSCMP(Op, DAG);
+ case ISD::UCMP:
+ return LowerUCMP(Op, DAG);
}
}
diff --git a/llvm/lib/Target/ARM/ARMISelLowering.h b/llvm/lib/Target/ARM/ARMISelLowering.h
index 5f4aef55b22c9..1cb7edd041b32 100644
--- a/llvm/lib/Target/ARM/ARMISelLowering.h
+++ b/llvm/lib/Target/ARM/ARMISelLowering.h
@@ -903,6 +903,8 @@ class VectorType;
void LowerLOAD(SDNode *N, SmallVectorImpl<SDValue> &Results,
SelectionDAG &DAG) const;
SDValue LowerFP_TO_BF16(SDValue Op, SelectionDAG &DAG) const;
+ SDValue LowerSCMP(SDValue Op, SelectionDAG &DAG) const;
+ SDValue LowerUCMP(SDValue Op, SelectionDAG &DAG) const;
Register getRegisterByName(const char* RegName, LLT VT,
const MachineFunction &MF) const override;
diff --git a/llvm/test/CodeGen/ARM/scmp.ll b/llvm/test/CodeGen/ARM/scmp.ll
index 6e493c993751c..9189aee6aaf43 100644
--- a/llvm/test/CodeGen/ARM/scmp.ll
+++ b/llvm/test/CodeGen/ARM/scmp.ll
@@ -4,12 +4,9 @@
define i8 @scmp_8_8(i8 signext %x, i8 signext %y) nounwind {
; CHECK-LABEL: scmp_8_8:
; CHECK: @ %bb.0:
-; CHECK-NEXT: cmp r0, r1
-; CHECK-NEXT: mov r0, #0
-; CHECK-NEXT: mov r2, #0
-; CHECK-NEXT: movwlt r0, #1
-; CHECK-NEXT: movwgt r2, #1
-; CHECK-NEXT: sub r0, r2, r0
+; CHECK-NEXT: subs r0, r0, r1
+; CHECK-NEXT: movwgt r0, #1
+; CHECK-NEXT: mvnlt r0, #0
; CHECK-NEXT: bx lr
%1 = call i8 @llvm.scmp(i8 %x, i8 %y)
ret i8 %1
@@ -18,12 +15,9 @@ define i8 @scmp_8_8(i8 signext %x, i8 signext %y) nounwind {
define i8 @scmp_8_16(i16 signext %x, i16 signext %y) nounwind {
; CHECK-LABEL: scmp_8_16:
; CHECK: @ %bb.0:
-; CHECK-NEXT: cmp r0, r1
-; CHECK-NEXT: mov r0, #0
-; CHECK-NEXT: mov r2, #0
-; CHECK-NEXT: movwlt r0, #1
-; CHECK-NEXT: movwgt r2, #1
-; CHECK-NEXT: sub r0, r2, r0
+; CHECK-NEXT: subs r0, r0, r1
+; CHECK-NEXT: movwgt r0, #1
+; CHECK-NEXT: mvnlt r0, #0
; CHECK-NEXT: bx lr
%1 = call i8 @llvm.scmp(i16 %x, i16 %y)
ret i8 %1
@@ -32,12 +26,9 @@ define i8 @scmp_8_16(i16 signext %x, i16 signext %y) nounwind {
define i8 @scmp_8_32(i32 %x, i32 %y) nounwind {
; CHECK-LABEL: scmp_8_32:
; CHECK: @ %bb.0:
-; CHECK-NEXT: cmp r0, r1
-; CHECK-NEXT: mov r0, #0
-; CHECK-NEXT: mov r2, #0
-; CHECK-NEXT: movwlt r0, #1
-; CHECK-NEXT: movwgt r2, #1
-; CHECK-NEXT: sub r0, r2, r0
+; CHECK-NEXT: subs r0, r0, r1
+; CHECK-NEXT: movwgt r0, #1
+; CHECK-NEXT: mvnlt r0, #0
; CHECK-NEXT: bx lr
%1 = call i8 @llvm.scmp(i32 %x, i32 %y)
ret i8 %1
@@ -92,17 +83,26 @@ define i8 @scmp_8_128(i128 %x, i128 %y) nounwind {
define i32 @scmp_32_32(i32 %x, i32 %y) nounwind {
; CHECK-LABEL: scmp_32_32:
; CHECK: @ %bb.0:
-; CHECK-NEXT: cmp r0, r1
-; CHECK-NEXT: mov r0, #0
-; CHECK-NEXT: mov r2, #0
-; CHECK-NEXT: movwlt r0, #1
-; CHECK-NEXT: movwgt r2, #1
-; CHECK-NEXT: sub r0, r2, r0
+; CHECK-NEXT: subs r0, r0, r1
+; CHECK-NEXT: movwgt r0, #1
+; CHECK-NEXT: mvnlt r0, #0
; CHECK-NEXT: bx lr
%1 = call i32 @llvm.scmp(i32 %x, i32 %y)
ret i32 %1
}
+define i32 @scmp_neg(i32 %x, i32 %y) nounwind {
+; CHECK-LABEL: scmp_neg:
+; CHECK: @ %bb.0:
+; CHECK-NEXT: adds r0, r0, r1
+; CHECK-NEXT: movwgt r0, #1
+; CHECK-NEXT: mvnlt r0, #0
+; CHECK-NEXT: bx lr
+ %yy = sub nsw i32 0, %y
+ %1 = call i32 @llvm.scmp(i32 %x, i32 %yy)
+ ret i32 %1
+}
+
define i32 @scmp_32_64(i64 %x, i64 %y) nounwind {
; CHECK-LABEL: scmp_32_64:
; CHECK: @ %bb.0:
diff --git a/llvm/test/CodeGen/ARM/ucmp.ll b/llvm/test/CodeGen/ARM/ucmp.ll
index ad4af534ee8fe..bb0201454d1ea 100644
--- a/llvm/test/CodeGen/ARM/ucmp.ll
+++ b/llvm/test/CodeGen/ARM/ucmp.ll
@@ -4,12 +4,9 @@
define i8 @ucmp_8_8(i8 zeroext %x, i8 zeroext %y) nounwind {
; CHECK-LABEL: ucmp_8_8:
; CHECK: @ %bb.0:
-; CHECK-NEXT: cmp r0, r1
-; CHECK-NEXT: mov r0, #0
-; CHECK-NEXT: mov r2, #0
-; CHECK-NEXT: movwlo r0, #1
-; CHECK-NEXT: movwhi r2, #1
-; CHECK-NEXT: sub r0, r2, r0
+; CHECK-NEXT: subs r0, r0, r1
+; CHECK-NEXT: movwhi r0, #1
+; CHECK-NEXT: mvnlo r0, #0
; CHECK-NEXT: bx lr
%1 = call i8 @llvm.ucmp(i8 %x, i8 %y)
ret i8 %1
@@ -18,12 +15,9 @@ define i8 @ucmp_8_8(i8 zeroext %x, i8 zeroext %y) nounwind {
define i8 @ucmp_8_16(i16 zeroext %x, i16 zeroext %y) nounwind {
; CHECK-LABEL: ucmp_8_16:
; CHECK: @ %bb.0:
-; CHECK-NEXT: cmp r0, r1
-; CHECK-NEXT: mov r0, #0
-; CHECK-NEXT: mov r2, #0
-; CHECK-NEXT: movwlo r0, #1
-; CHECK-NEXT: movwhi r2, #1
-; CHECK-NEXT: sub r0, r2, r0
+; CHECK-NEXT: subs r0, r0, r1
+; CHECK-NEXT: movwhi r0, #1
+; CHECK-NEXT: mvnlo r0, #0
; CHECK-NEXT: bx lr
%1 = call i8 @llvm.ucmp(i16 %x, i16 %y)
ret i8 %1
@@ -32,12 +26,9 @@ define i8 @ucmp_8_16(i16 zeroext %x, i16 zeroext %y) nounwind {
define i8 @ucmp_8_32(i32 %x, i32 %y) nounwind {
; CHECK-LABEL: ucmp_8_32:
; CHECK: @ %bb.0:
-; CHECK-NEXT: cmp r0, r1
-; CHECK-NEXT: mov r0, #0
-; CHECK-NEXT: mov r2, #0
-; CHECK-NEXT: movwlo r0, #1
-; CHECK-NEXT: movwhi r2, #1
-; CHECK-NEXT: sub r0, r2, r0
+; CHECK-NEXT: subs r0, r0, r1
+; CHECK-NEXT: movwhi r0, #1
+; CHECK-NEXT: mvnlo r0, #0
; CHECK-NEXT: bx lr
%1 = call i8 @llvm.ucmp(i32 %x, i32 %y)
ret i8 %1
@@ -92,12 +83,9 @@ define i8 @ucmp_8_128(i128 %x, i128 %y) nounwind {
define i32 @ucmp_32_32(i32 %x, i32 %y) nounwind {
; CHECK-LABEL: ucmp_32_32:
; CHECK: @ %bb.0:
-; CHECK-NEXT: cmp r0, r1
-; CHECK-NEXT: mov r0, #0
-; CHECK-NEXT: mov r2, #0
-; CHECK-NEXT: movwlo r0, #1
-; CHECK-NEXT: movwhi r2, #1
-; CHECK-NEXT: sub r0, r2, r0
+; CHECK-NEXT: subs r0, r0, r1
+; CHECK-NEXT: movwhi r0, #1
+; CHECK-NEXT: mvnlo r0, #0
; CHECK-NEXT: bx lr
%1 = call i32 @llvm.ucmp(i32 %x, i32 %y)
ret i32 %1
|
ce71cf8
to
6719da6
Compare
Okay we need the mattr |
@davemgreen okay so what do you think |
✅ With the latest revision this PR passed the C/C++ code formatter. |
b5a8f68
to
5f9627e
Compare
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Other than the comment below this LGTM.
Limited to non-thumb at the moment, but we can do this for i32 in 3 steps, using subs to set the flags initially.
Limited to non-thumb1 for scmp at the moment, since there is no good way to do it.